package com.datahole.suffixarray;

import com.datahole.suffixarray.model.Suffix;

/**
 *
 * Build Suffix Array using Prefix Doubling Algorithm
 * see also: Udi Manber and Gene Myers' seminal paper(1991): "Suffix arrays: A new method for on-line string searches"
 * 
 * Copyright (c) 2011 ljs (http://blog.csdn.net/ljsspace/)
 * Licensed under GPL (http://www.opensource.org/licenses/gpl-license.php)
 * 求后缀数组的倍增算法
 * @author ljs
 * 2011-07-17
 *
 */
public class SuffixArrayByDoubling implements SuffixArray{
    public static final char MAX_CHAR = '\uFFFF';
     

     
    //a prefix of suffix[isuffix] represented with digits
    class Tuple{
        int isuffix; //the p-th suffix
        int[] digits;
        public Tuple(int suffix,int[] digits){
            this.isuffix = suffix;
            this.digits = digits;          
        }
        public String toString(){
            StringBuffer sb = new StringBuffer();          
            sb.append(isuffix);
            sb.append("(");
            for(int i=0;i<digits.length;i++){
                sb.append(digits[i]);
                if(i<digits.length-1)
                    sb.append("-");
            }
            sb.append(")");
            return sb.toString();
        }
    }
       
     
    //d: the digit to do countingsort
    //max: A value's range is 0...max
    private void countingSort(int d,Tuple[] tA,Tuple[] tB,int max){
        //init the counter array
        int[] C = new int[max+1];
        for(int i=0;i<=max;i++){
            C[i] = 0;
        }
        //stat the count
        for(int j=0;j<tA.length;j++){
            C[tA[j].digits[d]]++;
        }
        //process the counter array C
        for(int i=1;i<=max;i++){
            C[i]+=C[i-1];
        }
        //distribute the values 
        for(int j=tA.length-1;j>=0;j--){
            //C[A[j]] <= A.length
            tB[--C[tA[j].digits[d]]]=tA[j];        
        }
        C = null;
    }
     
     
    //tA: input
    //tB: output for rank caculation
    private void radixSort(Tuple[] tA,Tuple[] tB,int max,int digitsLen){
        int len = tA.length;
        int digitsTotalLen = tA[0].digits.length;
             
        for(int d=digitsTotalLen-1,j=0;j<digitsLen;d--,j++){
            this.countingSort(d, tA, tB, max);
            //assign tB to tA
            if(j<digitsLen-1){
                for(int i=0;i<len;i++){
                    tA[i] = tB[i];
                }      
            }
        }
    }
    //max is the maximum value in any digit of TA.digits[], used for counting sort
    //tA: input
    //tB: the place holder, reused between iterations
    private Suffix rank(Tuple[] tA,Tuple[] tB,int max,int digitsLen){      
        int len = tA.length;       
        radixSort(tA,tB,max,digitsLen);
         
        int digitsTotalLen = tA[0].digits.length;
         
        //caculate rank and sa 
        int[] sa = new int[len];
        sa[0] = tB[0].isuffix; 
         
        int[] rank = new int[len];     
        int r = 1; //rank starts with 1
        rank[tB[0].isuffix] = r;       
        for(int i=1;i<len;i++){
            sa[i] = tB[i].isuffix; 
             
            boolean equalLast = true;
            for(int j=digitsTotalLen-digitsLen;j<digitsTotalLen;j++){
                if(tB[i].digits[j]!=tB[i-1].digits[j]){
                    equalLast = false;
                    break;
                }
            }
            if(!equalLast){
                r++;
            }
            rank[tB[i].isuffix] = r;   
        }
                  
        Suffix suffix = new Suffix(sa,rank);
        //judge if we are done
        if(r==len){
            suffix.setDone(true);
        }else{
        	suffix.setDone(false);
        }
        sa = null;
        rank = null;
        return suffix;
         
    }
     
 
    //Precondition: the last char in text must be less than other chars.
    public Suffix solve(StringBuffer text){
        if(text == null)return null;
        int len = text.length();
        if(len == 0) return null;
         
        int k=1;
//        char base = text.charAt(len-1); //the smallest char "#"
        int base = 0;
        Tuple[] tA = new Tuple[len];
        Tuple[] tB = new Tuple[len]; //placeholder
        for(int i=0;i<len;i++){
            tA[i] = new Tuple(i,new int[]{0,text.charAt(i)-base});
//            tA[i] = new Tuple(i,new int[]{0,text.charAt(i)});
        }
        Suffix suffix = rank(tA,tB,MAX_CHAR-base,1); 
//        Suffix suffix = rank(tA,tB,MAX_CHAR,1);
        while(!suffix.isDone()){ //no need to decide if: k<=glen
            k<<=1;
            int offset = k>>1;
            for(int i=0,j=i+offset;i<len;i++,j++){      
                tA[i].isuffix = i;
                tA[i].digits=new int[]{suffix.getRank()[i], 
                        (j<len)?suffix.getRank()[i+offset]:0};               
            }
            int max = suffix.getRank()[suffix.getSa()[len-1]];
            suffix = rank(tA,tB,max,2);
        }
        tA = null;
        tB = null;
        return suffix;
    }
}